///// data set cleaner ///
cd  "C:\Users\johna\Dropbox (Curiel Analytx)\ElectionNightResults\replication_code"
import delimited using "data\nyt_counties.csv", clear varn(1)

///drop variables that are not needed 
/*
drop eevp eevp_value eevp_display eevp_source turnout_stage absentee_outstanding /*
*/ provisional_outstanding provisional_count_progress last_updated leader_margin_display /*
*/ leader_margin_name_display 
*/ 


///now destring variables 
destring tot_exp_vote, ignore("NA") replace
destring absentee_max_ballots, ignore("NA") replace
destring leader_margin_value, ignore("NA") replace
destring margin2020, ignore("NA") replace

*calculate time from closing

gen time_num = clock(time,"YMD hms")
gen hours_from_close = time_num - clock("2020-11-04 01:00:00","YMD hms")
replace hours_from_close = hours_from_close/(60*60*1000)
drop if hours_from_close < 0

gen final_votes = votes if name ~= name[_n+1]
egen max_final_votes = max(final_votes),by(name)
drop final_votes
rename max_final_votes final_votes
gen pct_reported = 100*votes/final_votes



** calculate which candidate won each county

egen max_bidenj = max(bidenj) if name ~= "STATE",by(name)
egen max_trumpd = max(trumpd) if name ~= "STATE",by(name)
gen biden_win = max_bidenj > max_trumpd

/// sort the variables now to find out if things are missing. 

sort state name hours_from_close /// appears that nothing is missing 

save "data\nyt_counties.dta"


///now for the states data 
clear


net install readhtml, from(https://ssc.wisc.edu/sscc/stata/)

** Set up Leip table
readhtmltable "https://uselectionatlas.org/RESULTS/data.php?year=2020&datatype=national&def=1&f=1&off=0&elect=0"

compress

rename t2c3 state
rename t2c4 evotes1
rename t2c5 evotes2
rename t2c14 bidenvotes
rename t2c15 trumpvotes

keep state bidenvotes trumpvotes evotes*


drop in 1/2
drop in 52/l

destring *votes, replace force ignore(",")
replace state = lower(subinstr(state," ","-",.))
replace state = "district-of-columbia" if state == "d.-c."
gen leip_tvotes = bidenvotes + trumpvotes
gen leip_dempct = 100*biden/(biden + trump)

save "data/scratch/scratch", replace


** Get poll closing times

import excel using "data\state_closing_times.xlsx", first clear

save "data\scratch\call_times", replace


** Set up auxillary data
import excel using "data\all_state_changes.xlsx", first clear
keep state timestamp leading_candidate_name trailing_candidate_name leading_candidate_votes trailing_candidate_votes

rename timestamp time

replace state = subinstr(lower(trim(substr(state,1,strpos(state," ("))))," ","-",.)

gen rep = leading_candidate_votes if leading_candidate_name == "Trump"
gen dem = leading_candidate_votes if leading_candidate_name == "Biden"
replace rep = trailing_candidate_votes if trailing_candidate_name == "Trump"
replace dem = trailing_candidate_votes if trailing_candidate_name == "Biden"

drop leading* trailing*

merge m:1 state using "data\lasttime", nogen

keep if time > lasttime_num
drop lasttime*
rename time time_num

save "data\scratch\auxil", replace


///import the NYT data for a given state 
import delimited using "data/nyt_states.csv",varn(1) clear

sort state hoursfromclose

egen maxtotal = max(total),by(state)
egen maxdem = max(dem),by(state)
egen maxrep = max(rep),by(state)
gen dempct = 100*dem/(dem+rep)

//time num data
gen time_num = clock(time,"YMD hms")
format time_num %tcCCYY-NN-DD_HH:MM:SS
append using "data\scratch\auxil"
merge m:1 state using "data\scratch\scratch",nogen
order v1 time time_num


** find the last reading in the 24 hour period and the first reading after the 24 hour perior
sort state hoursfromclose
drop if hoursfromclose < 0
bysort state: gen n24 = _n if hoursfromclose < 24
gen ex_total = total + ((24-hoursfromclose)/(hoursfromclose[_n+1] - hoursfromclose)) * (total[_n+1] - total) if n24 ~= . & n24[_n+1] == .
gen ex_dem = dem + ((24-hoursfromclose)/(hoursfromclose[_n+1] - hoursfromclose)) * (dem[_n+1] - dem) if n24 ~= . & n24[_n+1] == .
gen ex_rep = rep + ((24-hoursfromclose)/(hoursfromclose[_n+1] - hoursfromclose)) * (rep[_n+1] - rep) if n24 ~= . & n24[_n+1] == .

gen twentyfour = 24
gen zero = 0


capture gen sixty = 60
capture bysort state: gen n60 = _n if hoursfromclose < 60

** find the last reading in the 60 hour period and the first reading after the 60 hour period
gen ex_total60 = total + ((60-hoursfromclose)/(hoursfromclose[_n+1] - hoursfromclose)) * (total[_n+1] - total) if n60 ~= . & n60[_n+1] == .

//now the norm fields 

sort state time_num
replace pollsclose = pollsclose[_n-1] if pollsclose == "" & state[_n-1] == state
replace hoursfromclose = ( (time_num - clock(pollsclose,"YMD hms")) )/(1000*60*60) if hoursfromclose == .

//gen total = dem + rep
gen norm_total = 100*(dem + rep)/leip_tvotes
gen norm_dempct = dempct - leip_dempct

gen end = time_num + 24*60*60*1000


*** interpolate a value at 60 hours

** find the last norm_total value @ < 60 hours
gen last_norm_total = norm_total if hoursfromclose <=60 & hoursfromclose[_n+1] > 60
** interpolate where the point is at 60 hours
gen norm_total60 = last_norm_total + (norm_total[_n+1] - norm_total)/(hoursfromclose[_n+1] - hoursfromclose)


** find the last norm_dempct value @ < 60 hours
gen last_norm_dempct = norm_dempct if hoursfromclose <=60 & hoursfromclose[_n+1] > 60
** interpolate where the point is at 60 hours
gen norm_dempct60 = last_norm_dempct + (norm_dempct[_n+1] - norm_dempct)/(hoursfromclose[_n+1] - hoursfromclose)



//save the data 
destring evotes1, ignore("NA") replace
destring evotes2, ignore("NA") replace
save "data/nyt_states", replace

//assign colors 
global color medsl_logoblue "55 145 255"
global color medsl_lightblue "89 203 245"
global color medsl_gold "192 186 121"
global color medsl_darkblue "21 109 208"
global color medsl_darklogored "199 38 84"
global color medsl_logored "255 104 120"
global color medsl_textgrey "82 89 113"
global color medsl_chartgrey "196 196 196"
global color medsl_lightpurple "148 141 229"
global color medsl_darkgold "152 144 68"
global color medsl_lightgreen "173 204 24"
global color medsl_darkgreen "55 194 86"
global color medsl_darkorange "212 98 0"
global color medsl_orange "255 131 24"
global color medsl_yellow "235 214 0"
global color medsl_blue1 "11 46 79"
global color medsl_blue2 "4 68 139"
global color medsl_blue3 "21 109 208"
global color medsl_blue4 "55 145 255"
global color medsl_blue5 "0 186 255"
global color medsl_blue6 "89 203 245"
global color medsl_blue7 "159 221 243"
global color medsl_red1 "141 33 21"
global color medsl_red2 "205 60 43"
global color medsl_red3 "246 87 62"
global color medsl_red4 "255 113 90"
global color medsl_red5 "245 145 129"
global color medsl_red6 "232 173 164"
global color medsl_red7 "237 208 203"
global color medsl_purple1 "78 74 129"
global color medsl_purple2 "99 94 153"
global color medsl_purple3 "125 118 199"
global color medsl_purple4 "148 141 229"
global color medsl_purple5 "177 170 251"
global color medsl_purple6 "204 200 100"
global color medsl_purple7 "221 219 251"




